######################################
###                                ###
###   Women and Party Building     ###
###                                ###
###     code_helperfunctions.R     ###
###                                ###
######################################

# This script contains helper-functions that will be used in the analysis and data construction


# This functions groups the RDD results and calculates the difference in treatment effects between the women's and men's samples
difference.calculator <- function(mod1, mod2){
  group.1 <- c(mod1$coef[3], mod1$se[3])
  group.2 <- c(mod2$coef[3], mod2$se[3])
  Diff <- cbind(group.1[1] - group.2[1], group.1[1] - group.2[1] - 1.96*sqrt(group.1[2]^2 + group.2[2]^2), group.1[1] - group.2[1] + 1.96*sqrt(group.1[2]^2 + group.2[2]^2))
  results <- rbind(c(mod1$coef[3], mod1$ci[3,]),
                   c(mod2$coef[3], mod2$ci[3,]),
                   Diff)
  return(results)
}

# This function extracts the coefficient and confidence intervals from the rdrobust models
extract.results <- function(results){
  return(data.frame(coef=results$coef[3], lower=results$ci[3,1], upper=results$ci[3,2]))
}

# This function formats our RDD results into a table
results.formater <- function(results,  sample.name=""){
  coef <- results$coef[3]
  ci <- results$ci[3,]
  conf <- paste("[", round(ci[1], 2), ", ", round(ci[2],2), "]", sep="")
  p <- results$pv[3]
  h <- results$bws[1,1]
  n <- sum(results$N_h)
  return(data.frame(Model = sample.name, Estimate=round(coef, 3), Confidence = conf, p=round(p, 3), h=round(h, 2), n=n))
}


# This function is used to calculate the difference between the two coefficients in the two-way fixed effects models in the appendix
difference.coefficients <- function(mod){
  diff <- coef(mod)[1] - coef(mod)[2]
  se.diff <- sqrt(sum(mod$cse^2) - 2*(vcov(mod)[1,2]))
  t <- abs(qt(.025, df=mod$df))
  conf.int <- diff + c(-1, 1)*t*se.diff
  results <- c(diff, conf.int)
  names(results) <- c("Difference", "2.5 %", "97.5 %")
  return(results)
}


# The following functions are from the rdrobust package. 
# We repeat them here because we alter the format of the RD plots slightly, 
# and that function calls on these other functions.

qrXXinv = function(x, ...) {
  #tcrossprod(solve(qr.R(qr(x, tol = 1e-10)), tol = 1e-10))
  #tcrossprod(solve(qr.R(qr(x))))
  chol2inv(chol(crossprod(x))) 
}

qrreg = function(x,y,w,s2=0,var.comp=TRUE, ...) {
  M.X = sqrt(w)*x
  X.M.X_inv = qrXXinv(M.X) 
  X.M.Y = crossprod(M.X,sqrt(w)*y)
  beta.hat = X.M.X_inv%*%X.M.Y
  Psi.hat=Sigma.hat=0
  if (var.comp==TRUE) {
    Psi.hat = crossprod((w*s2*w)*x,x)
    Sigma.hat = crossprod(Psi.hat%*%X.M.X_inv,X.M.X_inv)
  }
  output = list(X.M.X_inv=X.M.X_inv, X.M.Y=X.M.Y, beta.hat=beta.hat, Psi.hat=Psi.hat, Sigma.hat=Sigma.hat)
  return(output)
}

rdrobust_kweight = function(X, c,  h,  kernel){
  u = (X-c)/h
  if (kernel=="epanechnikov" | kernel=="epa") {
    w = (0.75*(1-u^2)*(abs(u)<=1))/h
  }
  else if (kernel=="uniform" | kernel=="uni") {
    w = (0.5*(abs(u)<=1))/h
  }
  else {
    w = ((1-abs(u))*(abs(u)<=1))/h
  }
  return(w)	
}

rdrobust_res = function(X, y, T, Z, m, hii, vce, matches, dups, dupsid, d) {
  n = length(y)
  dT=dZ=0
  if (!is.null(T)) dT = 1
  if (!is.null(Z)) dZ = ncol(Z)
  res = matrix(NA,n,1+dT+dZ)  	
  
  if (vce=="nn") {
    for (pos in 1:n) {
      rpos = dups[pos] - dupsid[pos]
      lpos = dupsid[pos] - 1
      while (lpos+rpos < min(c(matches,n-1))) {
        if (pos-lpos-1 <= 0) rpos = rpos + dups[pos+rpos+1]
        else if (pos+rpos+1>n) lpos = lpos + dups[pos-lpos-1]
        else if ((X[pos]-X[pos-lpos-1]) > (X[pos+rpos+1]-X[pos])) rpos = rpos + dups[pos+rpos+1]
        else if ((X[pos]-X[pos-lpos-1]) < (X[pos+rpos+1]-X[pos])) lpos = lpos + dups[pos-lpos-1]
        else {
          rpos = rpos + dups[pos+rpos+1]
          lpos = lpos + dups[pos-lpos-1]
        }
      }
      ind_J = max(c(0,(pos-lpos))):min(c(n,(pos+rpos)))
      y_J   = sum(y[ind_J])-y[pos]
      Ji = length(ind_J)-1
      res[pos,1] = sqrt(Ji/(Ji+1))*(y[pos] - y_J/Ji)
      if (!is.null(T)) {
        T_J = sum(T[ind_J])-T[pos]
        res[pos,2] = sqrt(Ji/(Ji+1))*(T[pos] - T_J/Ji)
      }
      if (!is.null(Z)) {
        for (i in 1:dZ) {
          Z_J = sum(Z[ind_J,i])-Z[pos,i]
          res[pos,1+dT+i] = sqrt(Ji/(Ji+1))*(Z[pos,i] - Z_J/Ji)
        }
      }
    }		
  }
  else {
    if (vce=="hc0") w = 1
    else if (vce=="hc1") w = sqrt(n/(n-d))
    else if (vce=="hc2") w = sqrt(1/(1-hii))
    else                 w =      1/(1-hii)
    res[,1] = w*(y-m[,1])
    if (dT==1) res[,2] = w*(T-m[,2])
    if (dZ>0) {
      for (i in 1:dZ) {
        res[,1+dT+i] = w*(Z[,i]-m[,1+dT+i])
      }
    }
  }
  return(res)
}


rdrobust_bw = function(Y, X, T, Z, C, W, c, o, nu, o_B, h_V, h_B, scale, vce, nnmatch, kernel, dups, dupsid){
  dT = dZ = dC = eC = 0
  w = rdrobust_kweight(X, c, h_V, kernel)
  dW = length(W)
  if (dW>1) {
    w = W*w
  }
  
  ind_V = w> 0; eY = Y[ind_V];eX = X[ind_V];eW = w[ind_V]
  n_V = sum(ind_V)
  D_V = eY
  R_V = matrix(NA,n_V,o+1)
  for (j in 1:(o+1)) R_V[,j] = (eX-c)^(j-1)
  invG_V = qrXXinv(R_V*sqrt(eW))
  e_v = matrix(0,(o+1),1); e_v[nu+1]=1
  s = 1
  eT=eC=eZ=NULL
  if (!is.null(T)) {
    dT = 1
    eT = T[ind_V]
    D_V = cbind(D_V,eT)
  }
  if (!is.null(Z)) {
    dZ = ncol(Z)
    eZ = Z[ind_V,,drop=FALSE]
    D_V = cbind(D_V,eZ)
    U = crossprod(R_V*eW,D_V)
    ZWD  = crossprod(eZ*eW,D_V)
    colsZ = (2+dT):max(c(2+dT+dZ-1,(2+dT)))
    UiGU =  crossprod(matrix(U[,colsZ],nrow=o+1),invG_V%*%U) 
    ZWZ = ZWD[,colsZ] - UiGU[,colsZ] 
    ZWY = ZWD[,1:(1+dT)] - UiGU[,1:(1+dT)] 
    gamma = chol2inv(chol(ZWZ))%*%ZWY
    s = c(1 , -gamma[,1])
  }
  if (!is.null(C)) {
    dC = 1
    eC =  C[ind_V] 
  }
  beta_V = invG_V%*%crossprod(R_V*eW,D_V)	
  if (is.null(Z) & !is.null(T)) {	
    tau_Y = factorial(nu)*beta_V[nu+1,1]
    tau_T = factorial(nu)*beta_V[nu+1,2]
    s = c(1/tau_T , -(tau_Y/tau_T^2))
  }
  if (!is.null(Z) & !is.null(T)) {	
    s_T = c(1 , -gamma[,2])
    tau_Y = factorial(nu)*t(s)%*%  c(beta_V[nu+1,1],beta_V[nu+1,colsZ])
    tau_T = factorial(nu)*t(s_T)%*%c(beta_V[nu+1,2],beta_V[nu+1,colsZ])
    s = c(1/tau_T , -(tau_Y/tau_T^2) , -(1/tau_T)*gamma[,1] + (tau_Y/tau_T^2)*gamma[,2])
  }	
  dups_V=dupsid_V=predicts_V=0
  
  if (vce=="nn") {
    dups_V   = dups[ind_V]
    dupsid_V = dupsid[ind_V]
  }
  
  if (vce=="hc0" | vce=="hc1" | vce=="hc2" | vce=="hc3") {
    predicts_V=R_V%*%beta_V
    if (vce=="hc2" | vce=="hc3") {
      hii=matrix(NA,n_V,1)	
      for (i in 1:n_V) {
        hii[i] = R_V[i,]%*%invG_V%*%(R_V*eW)[i,]
      }
    }
  }	
  res_V = rdrobust_res(eX, eY, eT, eZ, predicts_V, hii, vce, nnmatch, dups_V, dupsid_V, o+1)
  V_V = (invG_V%*%rdrobust_vce(dT+dZ, s, R_V*eW, res_V, eC)%*%invG_V)[nu+1,nu+1]
  v = crossprod(R_V*eW,((eX-c)/h_V)^(o+1))
  Hp = 0
  for (j in 1:(o+1)) Hp[j] = h_V^((j-1))
  BConst = (Hp*(invG_V%*%v))[nu+1]
  
  w = rdrobust_kweight(X, c, h_B, kernel)
  if (dW>1) {
    w = W*w
  }
  ind = w> 0 
  n_B = sum(ind)
  eY = Y[ind];eX = X[ind];eW = w[ind]
  D_B = eY
  R_B = matrix(NA,n_B,o_B+1)
  for (j in 1:(o_B+1)) R_B[,j] = (eX-c)^(j-1)
  invG_B = qrXXinv(R_B*sqrt(eW))
  eT=eC=eZ=NULL
  if (!is.null(T)) {
    eT = T[ind]
    D_B = cbind(D_B,eT)
  }
  if (!is.null(Z)) {
    eZ = Z[ind,,drop=FALSE]
    D_B = cbind(D_B,eZ)
  }
  if (!is.null(C)) {
    eC=C[ind]
  }	
  beta_B = invG_B%*%crossprod(R_B*eW,D_B)	
  BWreg=0
  if (scale>0) {
    e_B = matrix(0,(o_B+1),1); e_B[o+2]=1
    dups_B=dupsid_B=hii=predicts_B=0
    if (vce=="nn") {
      dups_B   = dups[ind]
      dupsid_B = dupsid[ind]
    }
    if (vce=="hc0" | vce=="hc1" | vce=="hc2" | vce=="hc3") {
      predicts_B=R_B%*%beta_B
      if (vce=="hc2" | vce=="hc3") {
        hii=matrix(NA,n_B,1)	
        for (i in 1:n_B) {
          hii[i] = R_B[i,]%*%invG_B%*%(R_B*eW)[i,]
        }
      }
    }	
    res_B = rdrobust_res(eX, eY, eT, eZ, predicts_B, hii, vce, nnmatch, dups_B, dupsid_B,o_B+1)
    V_B = (invG_B%*%rdrobust_vce(dT+dZ, s, R_B*eW, res_B, eC)%*%invG_B)[o+2,o+2]
    BWreg = 3*BConst^2*V_B
  }
  B =  sqrt(2*(o+1-nu))*BConst%*%(t(s)%*%(beta_B[o+2,]))
  V = (2*nu+1)*h_V^(2*nu+1)*V_V
  R = scale*(2*(o+1-nu))*BWreg
  rate = 1/(2*o+3)
  output = list(V=V,B=B,R=R,rate=rate)
  return(output)
}

rdrobust_vce = function(d, s, RX, res, C) {	
  k = ncol(as.matrix(RX))
  M = matrix(0,k,k)
  n  = length(C)
  if (is.null(C)) {
    w = 1
    if (d==0){
      M  = crossprod(c(res)*RX)
    }
    else {
      for (i in 1:(1+d)) {
        SS = res[,i]*res
        for (j in 1:(1+d)) {
          M = M + crossprod(RX*(s[i]*s[j])*SS[,j],RX)
        }
      }
    }
  }
  else {	
    clusters = unique(C)
    g     = length(clusters)
    w=((n-1)/(n-k))*(g/(g-1))
    if (d==0){
      for (i in 1:g) {
        ind=C==clusters[i]
        Xi = RX[ind,,drop=FALSE]
        ri = res[ind,,drop=FALSE]
        M = M + crossprod(t(crossprod(Xi,ri)),t(crossprod(Xi,ri)))
      }
    }
    else {
      for (i in 1:g) {
        ind=C==clusters[i]
        Xi = RX[ind,,drop=FALSE]
        ri = res[ind,,drop=FALSE]
        for (l in 1:(1+d)) {	
          for (j in 1:(1+d)) {
            M = M + crossprod(t(crossprod(Xi,s[l]*ri[,l])),t(crossprod(Xi,s[j]*ri[,j])))
          }	
        }					
      }
    }
  }
  return(w*M)		
}


bwconst = function(p,v,kernel){
  if (kernel=="epanechnikov" | kernel=="epa" | kernel==3) {
    K.fun = function(u) {(0.75*(1-u^2)*(abs(u)<=1))}
  }
  else if (kernel=="uniform" | kernel=="uni" | kernel==2) {
    K.fun = function(u) {(0.5*(abs(u)<=1))}
  }
  else  {
    K.fun = function(u) {((1-abs(u))*(abs(u)<=1))}
  }
  p1 = p+1  
  Gamma_p = Phi_p = matrix(NA,p1,p1)
  Omega_pq = matrix(NA,p1,1)
  for (i in 1:p1) {
    Omega.fun = function(u) {K.fun(u)*(u^(p1))*(u^(i-1))}
    Omega_pq[i] = integrate(Omega.fun,lower=0,upper=1)$value
    for (j in 1:p1) {
      Gamma.fun = function(u) {K.fun(u)*(u^(i-1))*(u^(j-1))}
      Phi.fun   = function(u) {(K.fun(u)^2)*(u^(i-1))*(u^(j-1))}
      Gamma_p[i,j] = integrate(Gamma.fun,lower=0,upper=1)$value
      Phi_p[i,j] = integrate(Phi.fun,lower=0,upper=1)$value
    }
  }
  B_const = solve(Gamma_p)%*%Omega_pq
  V_const = solve(Gamma_p)%*%Phi_p%*%solve(Gamma_p)
  C1 = B_const[v+1,1]
  C2 = V_const[v+1,v+1]
  return(c(C1,C2))
}

rdvce= function(X,y,frd=NULL,p,h,matches,vce,kernel){
  m = matches+1
  n = length(X)
  p1 = p+1
  sigma = matrix(0,n,1)
  if (vce=="resid") {
    for (k in 1:n) {
      cutoff = matrix(X[k],n,1)
      cutoff1 = X[k]
      W = rdrobust_kweight(X,cutoff1,h,"kernel")
      ind=W>0
      if (sum(ind)>5) {
        w.p=W[ind]; X.p=X[ind]; y.p=y[ind]
        XX.p = matrix(c((X.p-cutoff1)^0, poly(X.p-cutoff1,degree=p,raw=T)),length(X.p),p+1)
        mu0_phat_y = qr.coef(qr(XX.p*sqrt(w.p), tol = 1e-10), sqrt(w.p)*y.p)[1]
        if (is.null(frd)) {
          sigma[k] = (y[k] - mu0_phat_y)^2
        }
        else if (!is.null(frd)) {
          z.p=frd[ind]
          out=qrreg(XX.p, z.p, w.p, var.comp=FALSE) 
          mu0_phat_z = out$beta.hat[1]
          sigma[k] = (y[k] - mu0_phat_y)*(frd[k] - mu0_phat_z)
        }
      }
    }
  }
  else  {
    #y_match_avg = z_match_avg = matrix(NA,n,1)
    for (k in 1:n) {
      diffx = abs(X - X[k])
      m.group = sort(unique(diffx))[2:m]
      ind = which(diffx %in% m.group)
      y_match_avg = z_match_avg = mean(y[ind])
      Ji = length(ind)
      if (is.null(frd)) {
        sigma[k] = (Ji/(Ji+1))*(y[k] - y_match_avg)^2
      } 
      else if (!is.null(frd)) {
        z_match_avg = mean(frd[ind])
        sigma[k] = (Ji/(Ji+1))*(y[k] - y_match_avg)*(frd[k] - z_match_avg)
      }
    }
  }
  return(sigma)
}

regconst = function(d,h){
  d2 = 2*d+1
  d1 = d+1
  mu = matrix(0,d2, 1)
  mu[1] = 1
  XX = matrix(0,d1,d1)
  for (j in 2:d2) {
    i = j-1
    if (j%%2==1) {
      mu[j] = (1/(i+1))*(h/2)^i
    }
  }
  for (j in 1:d1) {
    XX[j,] = t(mu[j:(j+d)])
  }
  invXX =solve(XX)
  return(invXX)
}


rdplot = function(y, x, c=0, p=4, nbins=NULL, binselect="esmv", scale=NULL, kernel = "uni", weights=NULL, h=NULL, covs=NULL, 
                  support=NULL, subset = NULL, hide=FALSE, ci=NULL, shade=FALSE, par=NULL, title=NULL, cex.main=1, x.label=NULL, y.label=NULL, 
                  x.lim=NULL, y.lim=NULL, col.dots=NULL, col.lines=NULL, width.lines=3, type.dots = NULL,...) {
  
  if (!is.null(subset)) {
    x <- x[subset]
    y <- y[subset]
  }
  na.ok <- complete.cases(x) & complete.cases(y)
  
  if (!is.null(covs)){
    covs=as.matrix(covs)
    dZ = ncol(covs)
    if (!is.null(subset))  covs <- subset(covs,subset)
    for (i in 1:dZ) {
      na.ok <- na.ok & complete.cases(covs[,i])  
    }
  } 
  
  x <- x[na.ok]
  y <- y[na.ok]
  
  if (!is.null(covs)) covs <- subset(covs,na.ok)
  
  
  x_min = min(x);	x_max = max(x)
  x_l = x[x<c]; x_r = x[x>=c]	
  y_l = y[x<c];	y_r = y[x>=c]
  
  if (!is.null(support)) {
    support_l = support[1]
    support_r = support[2]
    if (support_l<x_min) x_min = support_l
    if (support_r>x_max) x_max = support_r
  }
  
  range_l = c - x_min
  range_r = x_max - c
  
  n_l = length(x_l)
  n_r = length(x_r)
  n = n_l + n_r
  meth="es"
  
  if (is.null(scale)) {
    scale = scale_l = scale_r = 1  
  } else{
    if (length(scale)==1) scale_l = scale_r = scale
    if (length(scale)==2) {
      scale_l = scale[1]
      scale_r = scale[2]
    }
  }
  
  if (!is.null(nbins)) {
    if (length(nbins)==1) nbins_l = nbins_r = nbins
    if (length(nbins)==2) {
      nbins_l = nbins[1]
      nbins_r = nbins[2]
    }
  }
  
  if (is.null(h)) {
    h_l = range_l
    h_r = range_r
  } else{
    if (length(h)==1) h_l = h_r = h
    if (length(h)==2) {
      h_l = h[1]
      h_r = h[2]
    }
  }
  k=4
  
  flag_no_ci <- FALSE
  if (is.null(ci)) {
    ci<- 95
    flag_no_ci <- TRUE
  }
  
  kernel_type = "Uniform"
  if (kernel=="epanechnikov" | kernel=="epa") kernel_type = "Epanechnikov"
  if (kernel=="triangular" | kernel=="tri") kernel_type = "Triangular"
  
  
  #####********************* ERRORS
  exit=0
  if (c<=x_min | c>=x_max){
    print("c should be set within the range of x")
    exit = 1
  }
  
  if (kernel!="uni" & kernel!="uniform" & kernel!="tri" & kernel!="triangular" & kernel!="epa" & kernel!="epanechnikov" & kernel!="" ){
    print("kernel incorrectly specified")
    exit = 1
  }
  
  if (p<0 ){
    print("p should be a positive number")
    exit = 1
  }
  
  if (scale<=0 |scale_l<=0 |scale_r<=0){
    print("scale should be a positive number")
    exit = 1
  }
  
  p_ceiling = ceiling(p)/p
  
  if (p_ceiling!=1 & p>0) {
    print("p should be an integer number")
    exit = 1
  }
  
  if (exit>0) stop()
  
  ###################################################################
  ### Polynomial curve (order = p) ##################################
  ###################################################################
  R_p_l = matrix(NA,n_l,p+1);  R_p_r = matrix(NA,n_r,p+1)
  for (j in 1:(p+1)) {
    R_p_l[,j] = (x_l-c)^(j-1)
    R_p_r[,j] = (x_r-c)^(j-1)
  }
  
  W_h_l = rdrobust_kweight(x_l,c,h_l,kernel)
  W_h_r = rdrobust_kweight(x_r,c,h_r,kernel)
  
  n_h_l = sum(W_h_l>0)
  n_h_r = sum(W_h_r>0)
  
  if (!is.null(weights)) {
    fw_l=weights[x<c];  fw_r=weights[x>=c]
    W_h_l = fw_l*W_h_l;	W_h_r = fw_r*W_h_r
  }
  invG_p_l  = qrXXinv((sqrt(W_h_l)*R_p_l));	invG_p_r  = qrXXinv((sqrt(W_h_r)*R_p_r))
  
  if (is.null(covs)) {
    
    gamma_p1_l = qrXXinv((sqrt(W_h_l)*R_p_l))%*%crossprod(R_p_l*W_h_l, y_l)	
    gamma_p1_r = qrXXinv((sqrt(W_h_r)*R_p_r))%*%crossprod(R_p_r*W_h_r, y_r)
    
  } else {
    
    
    z_l  = covs[x<c,]; z_r  = covs[x>=c,]	
    D_l  = cbind(y_l,z_l); D_r = cbind(y_r,z_r)
    U_p_l = crossprod(R_p_l*W_h_l,D_l); U_p_r = crossprod(R_p_r*W_h_r,D_r)
    beta_p_l = invG_p_l%*%crossprod(R_p_l*W_h_l,D_l); beta_p_r = invG_p_r%*%crossprod(R_p_r*W_h_r,D_r); 
    
    ZWD_p_l  = crossprod(z_l*W_h_l,D_l)
    ZWD_p_r  = crossprod(z_r*W_h_r,D_r)
    colsZ = 2:max(c(2+dZ-1,2))
    UiGU_p_l =  crossprod(U_p_l[,colsZ],invG_p_l%*%U_p_l) 
    UiGU_p_r =  crossprod(U_p_r[,colsZ],invG_p_r%*%U_p_r) 
    ZWZ_p_l = ZWD_p_l[,colsZ] - UiGU_p_l[,colsZ] 
    ZWZ_p_r = ZWD_p_r[,colsZ] - UiGU_p_r[,colsZ]     
    ZWY_p_l = ZWD_p_l[,1] - UiGU_p_l[,1] 
    ZWY_p_r = ZWD_p_r[,1] - UiGU_p_r[,1]     
    ZWZ_p = ZWZ_p_r + ZWZ_p_l
    ZWY_p = ZWY_p_r + ZWY_p_l
    gamma_p = chol2inv(chol(ZWZ_p))%*%ZWY_p
    s_Y = c(1 ,  -gamma_p[,1])
    
    gamma_p1_l = t(s_Y%*%t(beta_p_l))
    gamma_p1_r = t(s_Y%*%t(beta_p_r))
  }
  
  ###############################################
  ### Preparte data for polynomial curve plot ###
  ###############################################
  
  nplot = 500
  x_plot_l = seq(c-h_l, c, length.out =nplot)
  x_plot_r = seq(c, c+h_r, length.out =nplot)
  rplot_l = matrix(NA,nplot,p+1);  rplot_r = matrix(NA,nplot,p+1)
  for (j in 1:(p+1)) {
    rplot_l[,j] = (x_plot_l-c)^(j-1)
    rplot_r[,j] = (x_plot_r-c)^(j-1)
  }
  
  y_hat_l = rplot_l%*%gamma_p1_l
  y_hat_r = rplot_r%*%gamma_p1_r
  
  ###############################################
  ### Optimal Bins (using polynomial order k) ###
  ###############################################
  
  rk_l = matrix(NA,n_l,(k+1))
  rk_r = matrix(NA,n_r,(k+1))
  
  for (j in 1:(k+1)) {
    rk_l[,j] = x_l^(j-1)
    rk_r[,j] = x_r^(j-1)
  }
  
  gamma_k1_l = qrXXinv(rk_l)%*%crossprod(rk_l, y_l)  
  gamma_k2_l = qrXXinv(rk_l)%*%crossprod(rk_l, y_l^2)
  gamma_k1_r = qrXXinv(rk_r)%*%crossprod(rk_r, y_r)  
  gamma_k2_r = qrXXinv(rk_r)%*%crossprod(rk_r, y_r^2)
  
  ### Bias w/sample
  mu0_k1_l = rk_l%*%gamma_k1_l
  mu0_k1_r = rk_r%*%gamma_k1_r
  mu0_k2_l = rk_l%*%gamma_k2_l
  mu0_k2_r = rk_r%*%gamma_k2_r
  drk_l = matrix(NA,n_l,k)
  drk_r = matrix(NA,n_r,k)
  for (j in 1:k) {
    drk_l[,j] = j*x_l^(j-1)
    drk_r[,j] = j*x_r^(j-1)
  }
  
  ind_l = order(x_l); ind_r = order(x_r)
  x_i_l = x_l[ind_l] 
  y_i_l = y_l[ind_l]
  
  x_i_r = x_r[ind_r] 
  y_i_r = y_r[ind_r]
  
  dxi_l=(x_i_l[2:length(x_i_l)]-x_i_l[1:(length(x_i_l)-1)])
  dxi_r=(x_i_r[2:length(x_i_r)]-x_i_r[1:(length(x_i_r)-1)])
  dyi_l=(y_i_l[2:length(y_i_l)]-y_i_l[1:(length(y_i_l)-1)])
  dyi_r=(y_i_r[2:length(y_i_r)]-y_i_r[1:(length(y_i_r)-1)])
  
  x_bar_i_l = (x_i_l[2:length(x_i_l)]+x_i_l[1:(length(x_i_l)-1)])/2
  x_bar_i_r = (x_i_r[2:length(x_i_r)]+x_i_r[1:(length(x_i_r)-1)])/2
  
  drk_i_l = matrix(NA,n_l-1,k);	rk_i_l  = matrix(NA,n_l-1,(k+1))
  drk_i_r = matrix(NA,n_r-1,k);	rk_i_r  = matrix(NA,n_r-1,(k+1))
  
  for (j in 1:(k+1)) {
    rk_i_l[,j] = x_bar_i_l^(j-1)
    rk_i_r[,j] = x_bar_i_r^(j-1)
  }
  
  for (j in 1:k) {
    drk_i_l[,j] = j*x_bar_i_l^(j-1)
    drk_i_r[,j] = j*x_bar_i_r^(j-1)
  }
  
  mu1_i_hat_l = drk_i_l%*%(gamma_k1_l[2:(k+1)])
  mu1_i_hat_r = drk_i_r%*%(gamma_k1_r[2:(k+1)])
  
  mu0_i_hat_l = rk_i_l%*%gamma_k1_l
  mu0_i_hat_r = rk_i_r%*%gamma_k1_r
  mu2_i_hat_l = rk_i_l%*%gamma_k2_l
  mu2_i_hat_r = rk_i_r%*%gamma_k2_r
  
  mu0_hat_l = rk_l%*%gamma_k1_l
  mu0_hat_r = rk_r%*%gamma_k1_r
  mu2_hat_l = rk_l%*%gamma_k2_l
  mu2_hat_r = rk_r%*%gamma_k2_r
  
  mu1_hat_l = drk_l%*%(gamma_k1_l[2:(k+1)])
  mu1_hat_r = drk_r%*%(gamma_k1_r[2:(k+1)])
  
  mu1_i_hat_l = drk_i_l%*%(gamma_k1_l[2:(k+1)])
  mu1_i_hat_r = drk_i_r%*%(gamma_k1_r[2:(k+1)])
  
  sigma2_hat_l_bar = mu2_i_hat_l - mu0_i_hat_l^2
  sigma2_hat_r_bar = mu2_i_hat_r - mu0_i_hat_r^2
  sigma2_hat_l = mu2_hat_l - mu0_hat_l^2
  sigma2_hat_r = mu2_hat_r - mu0_hat_r^2
  
  J.fun = function(B,V) {ceiling((((2*B)/V)*n)^(1/3))}
  var_y_l = var(y_l)
  var_y_r = var(y_r)
  
  B_es_hat_dw = c( ((c-x_min)^2/(12*n))*sum(mu1_hat_l^2),((x_max-c)^2/(12*n))*sum(mu1_hat_r^2))
  V_es_hat_dw = c((0.5/(c-x_min))*sum(dxi_l*dyi_l^2),(0.5/(x_max-c))*sum(dxi_r*dyi_r^2))
  V_es_chk_dw = c((1/(c-x_min))*sum(dxi_l*sigma2_hat_l_bar),(1/(x_max-c))*sum(dxi_r*sigma2_hat_r_bar))
  J_es_hat_dw = J.fun(B_es_hat_dw, V_es_hat_dw)
  J_es_chk_dw = J.fun(B_es_hat_dw, V_es_chk_dw)
  
  B_qs_hat_dw = c((n_l^2/(24*n))*sum(dxi_l^2*mu1_i_hat_l^2), (n_r^2/(24*n))*sum(dxi_r^2*mu1_i_hat_r^2))
  V_qs_hat_dw = c((1/(2*n_l))*sum(dyi_l^2),(1/(2*n_r))*sum(dyi_r^2))
  V_qs_chk_dw = c((1/n_l)*sum(sigma2_hat_l), (1/n_r)*sum(sigma2_hat_r))
  J_qs_hat_dw = J.fun(B_qs_hat_dw, V_qs_hat_dw)
  J_qs_chk_dw = J.fun(B_qs_hat_dw, V_qs_chk_dw)
  
  J_es_hat_mv  = c(ceiling((var_y_l/V_es_hat_dw[1])*(n/log(n)^2)), ceiling((var_y_r/V_es_hat_dw[2])*(n/log(n)^2)))
  J_es_chk_mv  = c(ceiling((var_y_l/V_es_chk_dw[1])*(n/log(n)^2)), ceiling((var_y_r/V_es_chk_dw[2])*(n/log(n)^2)))
  J_qs_hat_mv  = c(ceiling((var_y_l/V_qs_hat_dw[1])*(n/log(n)^2)), ceiling((var_y_r/V_qs_hat_dw[2])*(n/log(n)^2)))
  J_qs_chk_mv  = c(ceiling((var_y_l/V_qs_chk_dw[1])*(n/log(n)^2)), ceiling((var_y_r/V_qs_chk_dw[2])*(n/log(n)^2)))
  
  #########################################################
  if (binselect=="es") {
    J_star_orig = J_es_hat_dw
    meth="es"
    binselect_type="IMSE-optimal evenly-spaced method using spacings estimators"
    J_IMSE = J_es_hat_dw
    J_MV   = J_es_hat_mv
  }
  if (binselect=="espr") {
    J_star_orig = J_es_chk_dw
    meth="es"
    binselect_type="IMSE-optimal evenly-spaced method using polynomial regression"
    J_IMSE = J_es_chk_dw
    J_MV   = J_es_chk_mv
  }
  if (binselect=="esmv" ) {
    J_star_orig = J_es_hat_mv
    meth="es"
    binselect_type="mimicking variance evenly-spaced method using spacings estimators"
    J_IMSE = J_es_hat_dw
    J_MV   = J_es_hat_mv
  }
  if (binselect=="esmvpr" ) {
    J_star_orig = J_es_chk_mv
    meth="es"
    binselect_type="mimicking variance evenly-spaced method using polynomial regression"
    J_IMSE = J_es_chk_dw
    J_MV   = J_es_chk_mv
  }
  if (binselect=="qs" ) {
    J_star_orig = J_qs_hat_dw
    meth="qs"
    binselect_type="IMSE-optimal quantile-spaced method using spacings estimators"
    J_IMSE = J_qs_hat_dw
    J_MV   = J_qs_hat_mv
  }
  if (binselect=="qspr" ) {
    J_star_orig = J_qs_chk_dw
    meth="qs"
    binselect_type="IMSE-optimal quantile-spaced method using polynomial regression"
    J_IMSE = J_qs_chk_dw
    J_MV   = J_qs_chk_mv
  }
  if (binselect=="qsmv" ) {
    J_star_orig = J_qs_hat_mv
    meth="qs"
    binselect_type="mimicking variance quantile-spaced method using spacings estimators"
    J_IMSE = J_qs_hat_dw
    J_MV   = J_qs_hat_mv
  }
  if (binselect=="qsmvpr" ) {
    J_star_orig = J_qs_chk_mv
    meth="qs"
    binselect_type="mimicking variance quantile-spaced method using polynomial regression"
    J_IMSE = J_qs_chk_dw
    J_MV   = J_qs_chk_mv
  }
  
  J_star_l = scale_l*J_star_orig[1]
  J_star_r = scale_r*J_star_orig[2]
  
  if (!is.null(nbins)) {
    J_star_l = nbins_l
    J_star_r = nbins_r
    binselect_type="manually evenly spaced"
  }
  
  if (var_y_l==0) {
    J_star_l = J_star_l_orig = 1
    print("Warning: not enough variability in the outcome variable below the threshold")
  }
  
  if (var_y_r==0) {
    J_star_r = J_star_r_orig = 1
    print("Warning: not enough variability in the outcome variable above the threshold")
  }
  
  rscale_l = J_star_l / J_IMSE[1]
  rscale_r = J_star_r / J_IMSE[2]
  
  bin_x_l = rep(0,length(x_l)); bin_x_r = rep(0,length(x_r))
  jump_l = range_l/J_star_l;jump_r = range_r/J_star_r;
  
  if (meth=="es") {
    jumps_l=seq(x_min,c,jump_l)
    jumps_r=seq(c,x_max,jump_r)
    #binselect_type="Evenly-Spaced"
  }   else if (meth=="qs") {
    jumps_l=quantile(x_l,probs=seq(0,1,1/J_star_l))
    jumps_r=quantile(x_r,probs=seq(0,1,1/J_star_r))
    # binselect_type="Quantile-Spaced"
  }
  
  for (k in 1:(J_star_l-1)) bin_x_l[x_l>=jumps_l[k] & x_l<jumps_l[k+1]] = -J_star_l+k-1 
  bin_x_l[x_l>=jumps_l[(J_star_l)]] = -1
  for (k in 1:(J_star_r-1)) bin_x_r[x_r>=jumps_r[k] & x_r<jumps_r[k+1]] = k 
  bin_x_r[x_r>=jumps_r[(J_star_r)]] = J_star_r
  
  rdplot_mean_bin_l=rdplot_mean_x_l=rdplot_mean_y_l=rep(0,J_star_l)
  rdplot_mean_bin_r=rdplot_mean_x_r=rdplot_mean_y_r=rep(0,J_star_r)
  
  for (k in 1:(J_star_l)) {
    rdplot_mean_bin_l[k]          = mean(c(jumps_l[k],jumps_l[k+1]))
    rdplot_mean_x_l[k]            = mean(x_l[bin_x_l==-k])
    rdplot_mean_y_l[k] = mean(y_l[bin_x_l==-k])
  }
  
  rdplot_mean_y_l = rev(rdplot_mean_y_l)
  rdplot_mean_x_l = rev(rdplot_mean_x_l)
  
  for (k in 1:(J_star_r)) {
    rdplot_mean_bin_r[k]  = mean(c(jumps_r[k],jumps_r[k+1]))
    rdplot_mean_x_r[k]    = mean(x_r[bin_x_r==k])
    rdplot_mean_y_r[k]    = mean(y_r[bin_x_r==k]) 
  }
  
  rdplot_mean_bin_l[J_star_l]=mean(c(jumps_l[J_star_l],c))
  rdplot_mean_bin_r[J_star_r]=mean(c(jumps_r[J_star_r],x_max))
  
  bin_x = c(bin_x_l,bin_x_r)
  rdplot_mean_bin = c(rdplot_mean_bin_l, rdplot_mean_bin_r)
  rdplot_mean_x   = c(rdplot_mean_x_l,   rdplot_mean_x_r)
  rdplot_mean_y   = c(rdplot_mean_y_l,   rdplot_mean_y_r)
  
  rdplot_sd_y_l=rdplot_N_l=rdplot_sd_y_r=rdplot_N_r=0
  for (j in 1:(J_star_l)) {
    rdplot_sd_y_l[j] =     sd(y_l[bin_x_l==-j])
    rdplot_N_l[j]    = length(y_l[bin_x_l==-j])
  }
  
  for (j in 1:(J_star_r)) {
    rdplot_sd_y_r[j] =     sd(y_r[bin_x_r==j])
    rdplot_N_r[j]    = length(y_r[bin_x_r==j])
  }
  
  rdplot_sd_y_l[is.na(rdplot_sd_y_l)]=0
  rdplot_sd_y_r[is.na(rdplot_sd_y_r)]=0
  rdplot_sd_y=c(rev(rdplot_sd_y_l),rdplot_sd_y_r)
  rdplot_N=c(rev(rdplot_N_l),rdplot_N_r)
  #quant = -qnorm(((1-(ci/100))/2))
  quant = -qt((1-(ci/100))/2,max(rdplot_N-1,1))
  rdplot_se_y <- rdplot_sd_y/sqrt(rdplot_N)
  rdplot_cil_bin = rdplot_mean_y - quant*rdplot_se_y
  rdplot_cir_bin = rdplot_mean_y + quant*rdplot_se_y
  
  if (hide=="FALSE") {
    
    if (is.null(col.lines)) col.lines = "black"
    if (is.null(col.dots))  col.dots  = "gray70"
    if (is.null(type.dots)) type.dots = 20
    if (is.null(title)) title="RD Plot"
    if (is.null(x.label)) x.label="X axis"
    if (is.null(y.label)) y.label="Y axis"
    #if (is.null(x.lim)) x.lim=c(min(x_l),max(x_r))
    #if (is.null(y.lim)) y.lim=c(min(c(y_l,y_r)),max(c(y_l,y_r)))
    #if (is.null(y.lim)) y.lim=c(min(rdplot_mean_y),max(rdplot_mean_y))
    
    par=par
    if (flag_no_ci==TRUE) {
      plot(rdplot_mean_bin,rdplot_mean_y, main=title, cex.main=cex.main, xlab=x.label, ylab=y.label, ylim=y.lim, xlim=x.lim, col=col.dots, pch=type.dots,...)
      lines(x_plot_l,y_hat_l,type="l",col=col.lines, lwd=width.lines) 
      lines(x_plot_r,y_hat_r,type="l",col=col.lines, lwd=width.lines) 
      abline(v=c)
    } else {
      if (shade==TRUE){
        plot(rdplot_mean_bin,rdplot_mean_y, main=title, cex.main=cex.main, xlab=x.label, ylab=y.label, ylim=y.lim, xlim=x.lim, col=col.dots, pch=type.dots,...)
        polygon(c(rev(rdplot_mean_bin),rdplot_mean_bin),c(rev(rdplot_cil_bin),rdplot_cir_bin),col = "grey75")
        lines(x_plot_l,y_hat_l,type="l",col=col.lines, lwd=width.lines) 
        lines(x_plot_r,y_hat_r,type="l",col=col.lines, lwd=width.lines)  
        abline(v=c)
      } else {
        plot(rdplot_mean_bin,rdplot_mean_y, main=title, cex.main=cex.main, xlab=x.label, ylab=y.label, ylim=y.lim, xlim=x.lim, col=col.dots, pch=type.dots,...)
        arrows(rdplot_mean_bin,rdplot_cil_bin,rdplot_mean_bin,rdplot_cir_bin,code=3,length=0.1,angle=90,col='grey')
        lines(x_plot_l,y_hat_l,type="l",col=col.lines, lwd=width.lines) 
        lines(x_plot_r,y_hat_r,type="l",col=col.lines, lwd=width.lines) 
        abline(v=c)
      }
    }
  }
  
  cutoffs = c(jumps_l,jumps_r[2:length(jumps_r)])
  rdplot_min_bin = cutoffs[1:(length(cutoffs)-1)]
  rdplot_max_bin = cutoffs[2:length(cutoffs)]
  
  bin_length = rdplot_max_bin-rdplot_min_bin
  bin_avg_l =   mean(bin_length[1:J_star_l])
  bin_med_l = median(bin_length[1:J_star_l])
  
  bin_avg_r =   mean(bin_length[(J_star_l+1):length(bin_length)])
  bin_med_r = median(bin_length[(J_star_l+1):length(bin_length)])
  
  vars_bins = data.frame("rdplot_mean_bin"=rdplot_mean_bin,"rdplot_mean_x"=rdplot_mean_x, "rdplot_mean_y"=rdplot_mean_y, "rdplot_min_bin"=rdplot_min_bin, "rdplot_max_bin"=rdplot_max_bin, "rdplot_se_y"=rdplot_se_y, "rdplot_N"=rdplot_N, "rdplot_ci_l"=rdplot_cil_bin, "rdplot_ci_r"=rdplot_cir_bin)
  vars_poly = data.frame("rdplot_x"= c(x_plot_l, x_plot_r), "rdplot_y"= c(y_hat_l, y_hat_r))
  
  coef = cbind(gamma_p1_l,gamma_p1_r)
  colnames(coef)=c("Left","Right")
  out=list(coef=coef, vars_bins=vars_bins, vars_poly=vars_poly,
           J=c(J_star_l,J_star_r), J_IMSE=J_IMSE, J_MV=J_MV, 
           scale=c(scale_l,scale_r), rscale=c(rscale_l,rscale_r),
           bin_avg=c(bin_avg_l,bin_avg_r), bin_med=c(bin_med_l,bin_med_r),
           p=p, c=c, h=c(h_l,h_r), N=c(n_l,n_r), Nh=c(n_h_l,n_h_r), binselect=binselect_type, kernel=kernel_type)
  
  out$call <- match.call()
  class(out) <- "rdplot"
  return(invisible(out))
}

print.rdplot <- function(x,...){
  cat("Call: rdplot\n\n")
  
  cat(paste("Number of Obs.           ",  format(sprintf("%10.0f",x$N[1]+x$N[2], width=10, justify="right")),"\n", sep=""))
  cat(paste("Kernel                   ",  format(x$kernel,   width=10, justify="right"),"\n", sep=""))
  cat("\n")
  cat(paste("Number of Obs.           ",  format(sprintf("%9.0f",x$N[1],     width=10, justify="right")),  "      ", format(sprintf("%9.0f",x$N[2],     width=10, justify="right")), "\n", sep=""))
  cat(paste("Eff. Number of Obs.      ",  format(sprintf("%9.0f",x$Nh[1],    width=10, justify="right")),  "      ", format(sprintf("%9.0f",x$Nh[2],    width=10, justify="right")), "\n", sep=""))
  cat(paste("Order poly. fit (p)      ",  format(sprintf("%9.0f",x$p,        width=10, justify="right")),  "      ", format(sprintf("%9.0f",x$p,        width=10, justify="right")), "\n", sep=""))
  cat(paste("BW poly. fit (h)         ",  format(sprintf("%9.3f",x$h[1],     width=10, justify="right")),  "      ", format(sprintf("%9.3f",x$h[2],     width=10, justify="right")), "\n", sep=""))
  cat(paste("Number of bins scale     ",  format(sprintf("%9.0f",x$scale[1], width=10, justify="right")),  "      ", format(sprintf("%9.0f",x$scale[2], width=10, justify="right")), "\n", sep=""))
  cat("\n")
}

summary.rdplot <- function(object,...) {
  x    <- object
  args <- list(...)
  
  cat("Call: rdplot\n\n")
  
  cat(paste("Number of Obs.           ",  format(sprintf("%10.0f",x$N[1]+x$N[2], width=10, justify="right")),"\n", sep=""))
  cat(paste("Kernel                   ",  format(x$kernel,   width=10, justify="right"),"\n", sep=""))
  cat("\n")
  cat(paste("Number of Obs.           ",  format(sprintf("%9.0f",x$N[1],     width=10, justify="right")),  "      ", format(sprintf("%9.0f",x$N[2],     width=10, justify="right")),        "\n", sep=""))
  cat(paste("Eff. Number of Obs.      ",  format(sprintf("%9.0f",x$Nh[1],   width=10, justify="right")),  "      ", format(sprintf("%9.0f",x$Nh[2],   width=10, justify="right")),        "\n", sep=""))
  cat(paste("Order poly. fit (p)      ",  format(sprintf("%9.0f",x$p,    width=10, justify="right")),  "      ", format(sprintf("%9.0f",x$p,    width=10, justify="right")),       "\n", sep=""))
  cat(paste("BW poly. fit (h)         ",  format(sprintf("%9.3f",x$h[1], width=10, justify="right")),  "      ", format(sprintf("%9.3f",x$h[2],  width=10, justify="right")),        "\n", sep=""))
  cat(paste("Number of bins scale     ",  format(sprintf("%9.0f",x$scale[1], width=10, justify="right")),  "      ", format(sprintf("%9.0f",x$scale[2], width=10, justify="right")),    "\n", sep=""))
  cat("\n")
  cat(paste("Bins Selected            ",  format(sprintf("%9.0f",x$J[1],       width=10, justify="right")),  "      ", format(sprintf("%9.0f",x$J[2], width=10, justify="right")),        "\n", sep=""))
  cat(paste("Average Bin Length       ",  format(sprintf("%9.3f",x$bin_avg[1], width=10, justify="right")),  "      ", format(sprintf("%9.3f",x$bin_avg[2], width=10, justify="right")),  "\n", sep=""))
  cat(paste("Median Bin Length        ",  format(sprintf("%9.3f",x$bin_med[1], width=10, justify="right")),  "      ", format(sprintf("%9.3f",x$bin_med[2], width=10, justify="right")),  "\n", sep=""))
  cat("\n")
  cat(paste("IMSE-optimal bins        ",  format(sprintf("%9.0f",x$J_IMSE[1], width=10, justify="right")),  "      ", format(sprintf("%9.0f",x$J_IMSE[2], width=10, justify="right")),   "\n", sep=""))
  cat(paste("Mimicking Variance bins  ",  format(sprintf("%9.0f",x$J_MV[1],   width=10, justify="right")),  "      ", format(sprintf("%9.0f",x$J_MV[2], width=10, justify="right")),     "\n", sep=""))
  cat("\n")
  cat(paste("Relative to IMSE-optimal:",   "\n", sep=""))
  cat(paste("Implied scale            ",  format(sprintf("%9.3f",x$rscale[1], width=10, justify="right")),  "      ", format(sprintf("%9.3f",x$rscale[2], width=10, justify="right")),   "\n", sep=""))
  cat(paste("WIMSE variance weight    ",  format(sprintf("%9.3f",1/(1+x$rscale[1]^3), width=10, justify="right")),  "      ", format(sprintf("%9.3f",1/(1+x$rscale[2]^3), width=10, justify="right")),   "\n", sep=""))
  cat(paste("WIMSE bias weight        ",  format(sprintf("%9.3f",x$rscale[1]^3/(1+x$rscale[1]^3), width=10, justify="right")),  "      ", format(sprintf("%9.3f",x$rscale[2]^3/(1+x$rscale[2]^3), width=10, justify="right")),   "\n", sep=""))
  cat("\n")
}



